import numpy as np
import pandas as pd
df=pd.read_csv("india.csv")
df.head()
| Region | Date | Frequency | Estimated Unemployment Rate (%) | Estimated Employed | Estimated Labour Participation Rate (%) | Area | |
|---|---|---|---|---|---|---|---|
| 0 | Andhra Pradesh | 31-05-2019 | Monthly | 3.65 | 11999139.0 | 43.24 | Rural |
| 1 | Andhra Pradesh | 30-06-2019 | Monthly | 3.05 | 11755881.0 | 42.05 | Rural |
| 2 | Andhra Pradesh | 31-07-2019 | Monthly | 3.75 | 12086707.0 | 43.50 | Rural |
| 3 | Andhra Pradesh | 31-08-2019 | Monthly | 3.32 | 12285693.0 | 43.97 | Rural |
| 4 | Andhra Pradesh | 30-09-2019 | Monthly | 5.17 | 12256762.0 | 44.68 | Rural |
df.tail()
| Region | Date | Frequency | Estimated Unemployment Rate (%) | Estimated Employed | Estimated Labour Participation Rate (%) | Area | |
|---|---|---|---|---|---|---|---|
| 763 | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 764 | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 765 | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 766 | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 767 | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
df.shape
(768, 7)
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 768 entries, 0 to 767 Data columns (total 7 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Region 740 non-null object 1 Date 740 non-null object 2 Frequency 740 non-null object 3 Estimated Unemployment Rate (%) 740 non-null float64 4 Estimated Employed 740 non-null float64 5 Estimated Labour Participation Rate (%) 740 non-null float64 6 Area 740 non-null object dtypes: float64(3), object(4) memory usage: 42.1+ KB
df.describe()
| Estimated Unemployment Rate (%) | Estimated Employed | Estimated Labour Participation Rate (%) | |
|---|---|---|---|
| count | 740.000000 | 7.400000e+02 | 740.000000 |
| mean | 11.787946 | 7.204460e+06 | 42.630122 |
| std | 10.721298 | 8.087988e+06 | 8.111094 |
| min | 0.000000 | 4.942000e+04 | 13.330000 |
| 25% | 4.657500 | 1.190404e+06 | 38.062500 |
| 50% | 8.350000 | 4.744178e+06 | 41.160000 |
| 75% | 15.887500 | 1.127549e+07 | 45.505000 |
| max | 76.740000 | 4.577751e+07 | 72.570000 |
x= df['Region']
x
0 Andhra Pradesh
1 Andhra Pradesh
2 Andhra Pradesh
3 Andhra Pradesh
4 Andhra Pradesh
...
763 NaN
764 NaN
765 NaN
766 NaN
767 NaN
Name: Region, Length: 768, dtype: object
y=df[' Estimated Unemployment Rate (%)']
y
0 3.65
1 3.05
2 3.75
3 3.32
4 5.17
...
763 NaN
764 NaN
765 NaN
766 NaN
767 NaN
Name: Estimated Unemployment Rate (%), Length: 768, dtype: float64
df2=df.iloc[:,3]
df2
0 3.65
1 3.05
2 3.75
3 3.32
4 5.17
...
763 NaN
764 NaN
765 NaN
766 NaN
767 NaN
Name: Estimated Unemployment Rate (%), Length: 768, dtype: float64
import plotly.express as px
import matplotlib as plt
fg = px.bar(df,x='Region' ,y=' Estimated Unemployment Rate (%)' ,color='Region' ,title='Unemployment Rate(State Wise) by Bar Graph', template='plotly')
fg.update_layout(xaxis={'categoryorder':'total descending'})
fg.show()
fg= px.bar(df,x='Region' ,y=' Estimated Unemployment Rate (%)' ,color='Region' ,title='Unemployment Rate (State Wise) by Bar Graph' ,template='plotly')
fg.update_layout(xaxis={'categoryorder':'total descending'})
fg.show()
fg = px.box(df ,x='Region' ,y=' Estimated Unemployment Rate (%)' ,color='Region' ,title='Unemployment Rate (Statewise) by Box Plot' ,template='plotly')
fg.update_layout(xaxis={'categoryorder':'total descending'})
fg.show()
fg = px.scatter(df,x='Region' ,y=' Estimated Unemployment Rate (%)' ,color='Region' ,title='Unemployment Rate (Statewise) by Scatter Plot' ,template='plotly')
fg.update_layout(xaxis={'categoryorder':'total descending'})
fg.show()
fg = px.histogram(df,x='Region' ,y=' Estimated Unemployment Rate (%)' ,color='Region' ,title='Unemployment Rate (Statewise) by Histogram' ,template='plotly')
fg.update_layout(xaxis={'categoryorder':'total descending'})
fg.show()